1 package org.apache.lucene.index;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import java.io.IOException;
21 import java.nio.charset.StandardCharsets;
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 import java.util.Collections;
25 import java.util.HashMap;
26 import java.util.Iterator;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Random;
30 import java.util.concurrent.atomic.AtomicReference;
31
32 import org.apache.lucene.analysis.MockAnalyzer;
33 import org.apache.lucene.codecs.Codec;
34 import org.apache.lucene.codecs.StoredFieldsFormat;
35 import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
36 import org.apache.lucene.document.Document;
37 import org.apache.lucene.document.DoubleField;
38 import org.apache.lucene.document.Field;
39 import org.apache.lucene.document.Field.Store;
40 import org.apache.lucene.document.FieldType;
41 import org.apache.lucene.document.FieldType.NumericType;
42 import org.apache.lucene.document.FloatField;
43 import org.apache.lucene.document.IntField;
44 import org.apache.lucene.document.LongField;
45 import org.apache.lucene.document.NumericDocValuesField;
46 import org.apache.lucene.document.StoredField;
47 import org.apache.lucene.document.StringField;
48 import org.apache.lucene.document.TextField;
49 import org.apache.lucene.search.IndexSearcher;
50 import org.apache.lucene.search.NumericRangeQuery;
51 import org.apache.lucene.search.Query;
52 import org.apache.lucene.search.TermQuery;
53 import org.apache.lucene.search.TopDocs;
54 import org.apache.lucene.store.Directory;
55 import org.apache.lucene.store.MMapDirectory;
56 import org.apache.lucene.store.MockDirectoryWrapper;
57 import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
58 import org.apache.lucene.util.BytesRef;
59 import org.apache.lucene.util.IOUtils;
60 import org.apache.lucene.util.TestUtil;
61
62 import com.carrotsearch.randomizedtesting.generators.RandomInts;
63 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
64 import com.carrotsearch.randomizedtesting.generators.RandomStrings;
65
66
67
68
69
70
71
72 public abstract class BaseStoredFieldsFormatTestCase extends BaseIndexFileFormatTestCase {
73
74 @Override
75 protected void addRandomFields(Document d) {
76 final int numValues = random().nextInt(3);
77 for (int i = 0; i < numValues; ++i) {
78 d.add(new StoredField("f", TestUtil.randomSimpleString(random(), 100)));
79 }
80 }
81
82 public void testRandomStoredFields() throws IOException {
83 Directory dir = newDirectory();
84 Random rand = random();
85 RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(TestUtil.nextInt(rand, 5, 20)));
86
87 final int docCount = atLeast(200);
88 final int fieldCount = TestUtil.nextInt(rand, 1, 5);
89
90 final List<Integer> fieldIDs = new ArrayList<>();
91
92 FieldType customType = new FieldType(TextField.TYPE_STORED);
93 customType.setTokenized(false);
94 Field idField = newField("id", "", customType);
95
96 for(int i=0;i<fieldCount;i++) {
97 fieldIDs.add(i);
98 }
99
100 final Map<String,Document> docs = new HashMap<>();
101
102 if (VERBOSE) {
103 System.out.println("TEST: build index docCount=" + docCount);
104 }
105
106 FieldType customType2 = new FieldType();
107 customType2.setStored(true);
108 for(int i=0;i<docCount;i++) {
109 Document doc = new Document();
110 doc.add(idField);
111 final String id = ""+i;
112 idField.setStringValue(id);
113 docs.put(id, doc);
114 if (VERBOSE) {
115 System.out.println("TEST: add doc id=" + id);
116 }
117
118 for(int field: fieldIDs) {
119 final String s;
120 if (rand.nextInt(4) != 3) {
121 s = TestUtil.randomUnicodeString(rand, 1000);
122 doc.add(newField("f"+field, s, customType2));
123 } else {
124 s = null;
125 }
126 }
127 w.addDocument(doc);
128 if (rand.nextInt(50) == 17) {
129
130 Collections.shuffle(fieldIDs, random());
131 }
132 if (rand.nextInt(5) == 3 && i > 0) {
133 final String delID = ""+rand.nextInt(i);
134 if (VERBOSE) {
135 System.out.println("TEST: delete doc id=" + delID);
136 }
137 w.deleteDocuments(new Term("id", delID));
138 docs.remove(delID);
139 }
140 }
141
142 if (VERBOSE) {
143 System.out.println("TEST: " + docs.size() + " docs in index; now load fields");
144 }
145 if (docs.size() > 0) {
146 String[] idsList = docs.keySet().toArray(new String[docs.size()]);
147
148 for(int x=0;x<2;x++) {
149 IndexReader r = w.getReader();
150 IndexSearcher s = newSearcher(r);
151
152 if (VERBOSE) {
153 System.out.println("TEST: cycle x=" + x + " r=" + r);
154 }
155
156 int num = atLeast(1000);
157 for(int iter=0;iter<num;iter++) {
158 String testID = idsList[rand.nextInt(idsList.length)];
159 if (VERBOSE) {
160 System.out.println("TEST: test id=" + testID);
161 }
162 TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1);
163 assertEquals(1, hits.totalHits);
164 Document doc = r.document(hits.scoreDocs[0].doc);
165 Document docExp = docs.get(testID);
166 for(int i=0;i<fieldCount;i++) {
167 assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f"+i), doc.get("f"+i));
168 }
169 }
170 r.close();
171 w.forceMerge(1);
172 }
173 }
174 w.close();
175 dir.close();
176 }
177
178
179 public void testStoredFieldsOrder() throws Throwable {
180 Directory d = newDirectory();
181 IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
182 Document doc = new Document();
183
184 FieldType customType = new FieldType();
185 customType.setStored(true);
186 doc.add(newField("zzz", "a b c", customType));
187 doc.add(newField("aaa", "a b c", customType));
188 doc.add(newField("zzz", "1 2 3", customType));
189 w.addDocument(doc);
190 IndexReader r = w.getReader();
191 Document doc2 = r.document(0);
192 Iterator<IndexableField> it = doc2.getFields().iterator();
193 assertTrue(it.hasNext());
194 Field f = (Field) it.next();
195 assertEquals(f.name(), "zzz");
196 assertEquals(f.stringValue(), "a b c");
197
198 assertTrue(it.hasNext());
199 f = (Field) it.next();
200 assertEquals(f.name(), "aaa");
201 assertEquals(f.stringValue(), "a b c");
202
203 assertTrue(it.hasNext());
204 f = (Field) it.next();
205 assertEquals(f.name(), "zzz");
206 assertEquals(f.stringValue(), "1 2 3");
207 assertFalse(it.hasNext());
208 r.close();
209 w.close();
210 d.close();
211 }
212
213
214 public void testBinaryFieldOffsetLength() throws IOException {
215 Directory dir = newDirectory();
216 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
217 byte[] b = new byte[50];
218 for(int i=0;i<50;i++)
219 b[i] = (byte) (i+77);
220
221 Document doc = new Document();
222 Field f = new StoredField("binary", b, 10, 17);
223 byte[] bx = f.binaryValue().bytes;
224 assertTrue(bx != null);
225 assertEquals(50, bx.length);
226 assertEquals(10, f.binaryValue().offset);
227 assertEquals(17, f.binaryValue().length);
228 doc.add(f);
229 w.addDocument(doc);
230 w.close();
231
232 IndexReader ir = DirectoryReader.open(dir);
233 Document doc2 = ir.document(0);
234 IndexableField f2 = doc2.getField("binary");
235 b = f2.binaryValue().bytes;
236 assertTrue(b != null);
237 assertEquals(17, b.length, 17);
238 assertEquals(87, b[0]);
239 ir.close();
240 dir.close();
241 }
242
243 public void testNumericField() throws Exception {
244 Directory dir = newDirectory();
245 RandomIndexWriter w = new RandomIndexWriter(random(), dir);
246 final int numDocs = atLeast(500);
247 final Number[] answers = new Number[numDocs];
248 final NumericType[] typeAnswers = new NumericType[numDocs];
249 for(int id=0;id<numDocs;id++) {
250 Document doc = new Document();
251 final Field nf;
252 final Field sf;
253 final Number answer;
254 final NumericType typeAnswer;
255 if (random().nextBoolean()) {
256
257 if (random().nextBoolean()) {
258 final float f = random().nextFloat();
259 answer = Float.valueOf(f);
260 nf = new FloatField("nf", f, Field.Store.NO);
261 sf = new StoredField("nf", f);
262 typeAnswer = NumericType.FLOAT;
263 } else {
264 final double d = random().nextDouble();
265 answer = Double.valueOf(d);
266 nf = new DoubleField("nf", d, Field.Store.NO);
267 sf = new StoredField("nf", d);
268 typeAnswer = NumericType.DOUBLE;
269 }
270 } else {
271
272 if (random().nextBoolean()) {
273 final int i = random().nextInt();
274 answer = Integer.valueOf(i);
275 nf = new IntField("nf", i, Field.Store.NO);
276 sf = new StoredField("nf", i);
277 typeAnswer = NumericType.INT;
278 } else {
279 final long l = random().nextLong();
280 answer = Long.valueOf(l);
281 nf = new LongField("nf", l, Field.Store.NO);
282 sf = new StoredField("nf", l);
283 typeAnswer = NumericType.LONG;
284 }
285 }
286 doc.add(nf);
287 doc.add(sf);
288 answers[id] = answer;
289 typeAnswers[id] = typeAnswer;
290 FieldType ft = new FieldType(IntField.TYPE_STORED);
291 ft.setNumericPrecisionStep(Integer.MAX_VALUE);
292 doc.add(new IntField("id", id, ft));
293 doc.add(new NumericDocValuesField("id", id));
294 w.addDocument(doc);
295 }
296 final DirectoryReader r = w.getReader();
297 w.close();
298
299 assertEquals(numDocs, r.numDocs());
300
301 for(LeafReaderContext ctx : r.leaves()) {
302 final LeafReader sub = ctx.reader();
303 final NumericDocValues ids = DocValues.getNumeric(sub, "id");
304 for(int docID=0;docID<sub.numDocs();docID++) {
305 final Document doc = sub.document(docID);
306 final Field f = (Field) doc.getField("nf");
307 assertTrue("got f=" + f, f instanceof StoredField);
308 assertEquals(answers[(int) ids.get(docID)], f.numericValue());
309 }
310 }
311 r.close();
312 dir.close();
313 }
314
315 public void testIndexedBit() throws Exception {
316 Directory dir = newDirectory();
317 RandomIndexWriter w = new RandomIndexWriter(random(), dir);
318 Document doc = new Document();
319 FieldType onlyStored = new FieldType();
320 onlyStored.setStored(true);
321 doc.add(new Field("field", "value", onlyStored));
322 doc.add(new StringField("field2", "value", Field.Store.YES));
323 w.addDocument(doc);
324 IndexReader r = w.getReader();
325 w.close();
326 assertEquals(IndexOptions.NONE, r.document(0).getField("field").fieldType().indexOptions());
327 assertNotNull(r.document(0).getField("field2").fieldType().indexOptions());
328 r.close();
329 dir.close();
330 }
331
332 public void testReadSkip() throws IOException {
333 Directory dir = newDirectory();
334 IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
335 iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
336 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
337
338 FieldType ft = new FieldType();
339 ft.setStored(true);
340 ft.freeze();
341
342 final String string = TestUtil.randomSimpleString(random(), 50);
343 final byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
344 final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong();
345 final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt();
346 final float f = random().nextFloat();
347 final double d = random().nextDouble();
348
349 List<Field> fields = Arrays.asList(
350 new Field("bytes", bytes, ft),
351 new Field("string", string, ft),
352 new LongField("long", l, Store.YES),
353 new IntField("int", i, Store.YES),
354 new FloatField("float", f, Store.YES),
355 new DoubleField("double", d, Store.YES)
356 );
357
358 for (int k = 0; k < 100; ++k) {
359 Document doc = new Document();
360 for (Field fld : fields) {
361 doc.add(fld);
362 }
363 iw.w.addDocument(doc);
364 }
365 iw.commit();
366
367 final DirectoryReader reader = DirectoryReader.open(dir);
368 final int docID = random().nextInt(100);
369 for (Field fld : fields) {
370 String fldName = fld.name();
371 final Document sDoc = reader.document(docID, Collections.singleton(fldName));
372 final IndexableField sField = sDoc.getField(fldName);
373 if (Field.class.equals(fld.getClass())) {
374 assertEquals(fld.binaryValue(), sField.binaryValue());
375 assertEquals(fld.stringValue(), sField.stringValue());
376 } else {
377 assertEquals(fld.numericValue(), sField.numericValue());
378 }
379 }
380 reader.close();
381 iw.close();
382 dir.close();
383 }
384
385 public void testEmptyDocs() throws IOException {
386 Directory dir = newDirectory();
387 IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
388 iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
389 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
390
391
392 final Document emptyDoc = new Document();
393 final int numDocs = random().nextBoolean() ? 1 : atLeast(1000);
394 for (int i = 0; i < numDocs; ++i) {
395 iw.addDocument(emptyDoc);
396 }
397 iw.commit();
398 final DirectoryReader rd = DirectoryReader.open(dir);
399 for (int i = 0; i < numDocs; ++i) {
400 final Document doc = rd.document(i);
401 assertNotNull(doc);
402 assertTrue(doc.getFields().isEmpty());
403 }
404 rd.close();
405
406 iw.close();
407 dir.close();
408 }
409
410 public void testConcurrentReads() throws Exception {
411 Directory dir = newDirectory();
412 IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
413 iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
414 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
415
416
417 final Document doc = new Document();
418 final Field field = new StringField("fld", "", Store.YES);
419 doc.add(field);
420 final int numDocs = atLeast(1000);
421 for (int i = 0; i < numDocs; ++i) {
422 field.setStringValue("" + i);
423 iw.addDocument(doc);
424 }
425 iw.commit();
426
427 final DirectoryReader rd = DirectoryReader.open(dir);
428 final IndexSearcher searcher = new IndexSearcher(rd);
429 final int concurrentReads = atLeast(5);
430 final int readsPerThread = atLeast(50);
431 final List<Thread> readThreads = new ArrayList<>();
432 final AtomicReference<Exception> ex = new AtomicReference<>();
433 for (int i = 0; i < concurrentReads; ++i) {
434 readThreads.add(new Thread() {
435
436 int[] queries;
437
438 {
439 queries = new int[readsPerThread];
440 for (int i = 0; i < queries.length; ++i) {
441 queries[i] = random().nextInt(numDocs);
442 }
443 }
444
445 @Override
446 public void run() {
447 for (int q : queries) {
448 final Query query = new TermQuery(new Term("fld", "" + q));
449 try {
450 final TopDocs topDocs = searcher.search(query, 1);
451 if (topDocs.totalHits != 1) {
452 throw new IllegalStateException("Expected 1 hit, got " + topDocs.totalHits);
453 }
454 final Document sdoc = rd.document(topDocs.scoreDocs[0].doc);
455 if (sdoc == null || sdoc.get("fld") == null) {
456 throw new IllegalStateException("Could not find document " + q);
457 }
458 if (!Integer.toString(q).equals(sdoc.get("fld"))) {
459 throw new IllegalStateException("Expected " + q + ", but got " + sdoc.get("fld"));
460 }
461 } catch (Exception e) {
462 ex.compareAndSet(null, e);
463 }
464 }
465 }
466 });
467 }
468 for (Thread thread : readThreads) {
469 thread.start();
470 }
471 for (Thread thread : readThreads) {
472 thread.join();
473 }
474 rd.close();
475 if (ex.get() != null) {
476 throw ex.get();
477 }
478
479 iw.close();
480 dir.close();
481 }
482
483 private byte[] randomByteArray(int length, int max) {
484 final byte[] result = new byte[length];
485 for (int i = 0; i < length; ++i) {
486 result[i] = (byte) random().nextInt(max);
487 }
488 return result;
489 }
490
491 public void testWriteReadMerge() throws IOException {
492
493 final Codec otherCodec;
494 if ("SimpleText".equals(Codec.getDefault().getName())) {
495 otherCodec = TestUtil.getDefaultCodec();
496 } else {
497 otherCodec = new SimpleTextCodec();
498 }
499 Directory dir = newDirectory();
500 IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
501 iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
502 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
503
504 final int docCount = atLeast(200);
505 final byte[][][] data = new byte [docCount][][];
506 for (int i = 0; i < docCount; ++i) {
507 final int fieldCount = rarely()
508 ? RandomInts.randomIntBetween(random(), 1, 500)
509 : RandomInts.randomIntBetween(random(), 1, 5);
510 data[i] = new byte[fieldCount][];
511 for (int j = 0; j < fieldCount; ++j) {
512 final int length = rarely()
513 ? random().nextInt(1000)
514 : random().nextInt(10);
515 final int max = rarely() ? 256 : 2;
516 data[i][j] = randomByteArray(length, max);
517 }
518 }
519
520 final FieldType type = new FieldType(StringField.TYPE_STORED);
521 type.setIndexOptions(IndexOptions.NONE);
522 type.freeze();
523 IntField id = new IntField("id", 0, Store.YES);
524 for (int i = 0; i < data.length; ++i) {
525 Document doc = new Document();
526 doc.add(id);
527 id.setIntValue(i);
528 for (int j = 0; j < data[i].length; ++j) {
529 Field f = new Field("bytes" + j, data[i][j], type);
530 doc.add(f);
531 }
532 iw.w.addDocument(doc);
533 if (random().nextBoolean() && (i % (data.length / 10) == 0)) {
534 iw.w.close();
535 IndexWriterConfig iwConfNew = newIndexWriterConfig(new MockAnalyzer(random()));
536
537 if (iwConf.getCodec() == otherCodec) {
538 iwConfNew.setCodec(Codec.getDefault());
539 } else {
540 iwConfNew.setCodec(otherCodec);
541 }
542 iwConf = iwConfNew;
543 iw = new RandomIndexWriter(random(), dir, iwConf);
544 }
545 }
546
547 for (int i = 0; i < 10; ++i) {
548 final int min = random().nextInt(data.length);
549 final int max = min + random().nextInt(20);
550 iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false));
551 }
552
553 iw.forceMerge(2);
554
555 iw.commit();
556
557 final DirectoryReader ir = DirectoryReader.open(dir);
558 assertTrue(ir.numDocs() > 0);
559 int numDocs = 0;
560 for (int i = 0; i < ir.maxDoc(); ++i) {
561 final Document doc = ir.document(i);
562 if (doc == null) {
563 continue;
564 }
565 ++ numDocs;
566 final int docId = doc.getField("id").numericValue().intValue();
567 assertEquals(data[docId].length + 1, doc.getFields().size());
568 for (int j = 0; j < data[docId].length; ++j) {
569 final byte[] arr = data[docId][j];
570 final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j);
571 final byte[] arr2 = Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length);
572 assertArrayEquals(arr, arr2);
573 }
574 }
575 assertTrue(ir.numDocs() <= numDocs);
576 ir.close();
577
578 iw.deleteAll();
579 iw.commit();
580 iw.forceMerge(1);
581
582 iw.close();
583 dir.close();
584 }
585
586
587 private static class DummyFilterLeafReader extends FilterLeafReader {
588
589 public DummyFilterLeafReader(LeafReader in) {
590 super(in);
591 }
592
593 @Override
594 public void document(int docID, StoredFieldVisitor visitor) throws IOException {
595 super.document(maxDoc() - 1 - docID, visitor);
596 }
597
598 }
599
600 private static class DummyFilterDirectoryReader extends FilterDirectoryReader {
601
602 public DummyFilterDirectoryReader(DirectoryReader in) throws IOException {
603 super(in, new SubReaderWrapper() {
604 @Override
605 public LeafReader wrap(LeafReader reader) {
606 return new DummyFilterLeafReader(reader);
607 }
608 });
609 }
610
611 @Override
612 protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
613 return new DummyFilterDirectoryReader(in);
614 }
615
616 }
617
618 public void testMergeFilterReader() throws IOException {
619 Directory dir = newDirectory();
620 RandomIndexWriter w = new RandomIndexWriter(random(), dir);
621 final int numDocs = atLeast(200);
622 final String[] stringValues = new String[10];
623 for (int i = 0; i < stringValues.length; ++i) {
624 stringValues[i] = RandomStrings.randomRealisticUnicodeOfLength(random(), 10);
625 }
626 Document[] docs = new Document[numDocs];
627 for (int i = 0; i < numDocs; ++i) {
628 Document doc = new Document();
629 doc.add(new StringField("to_delete", random().nextBoolean() ? "yes" : "no", Store.NO));
630 doc.add(new StoredField("id", i));
631 doc.add(new StoredField("i", random().nextInt(50)));
632 doc.add(new StoredField("l", random().nextLong()));
633 doc.add(new StoredField("d", random().nextDouble()));
634 doc.add(new StoredField("f", random().nextFloat()));
635 doc.add(new StoredField("s", RandomPicks.randomFrom(random(), stringValues)));
636 doc.add(new StoredField("b", new BytesRef(RandomPicks.randomFrom(random(), stringValues))));
637 docs[i] = doc;
638 w.addDocument(doc);
639 }
640 if (random().nextBoolean()) {
641 w.deleteDocuments(new Term("to_delete", "yes"));
642 }
643 w.commit();
644 w.close();
645
646 DirectoryReader reader = new DummyFilterDirectoryReader(DirectoryReader.open(dir));
647
648 Directory dir2 = newDirectory();
649 w = new RandomIndexWriter(random(), dir2);
650 TestUtil.addIndexesSlowly(w.w, reader);
651 reader.close();
652 dir.close();
653
654 reader = w.getReader();
655 for (int i = 0; i < reader.maxDoc(); ++i) {
656 final Document doc = reader.document(i);
657 final int id = doc.getField("id").numericValue().intValue();
658 final Document expected = docs[id];
659 assertEquals(expected.get("s"), doc.get("s"));
660 assertEquals(expected.getField("i").numericValue(), doc.getField("i").numericValue());
661 assertEquals(expected.getField("l").numericValue(), doc.getField("l").numericValue());
662 assertEquals(expected.getField("d").numericValue(), doc.getField("d").numericValue());
663 assertEquals(expected.getField("f").numericValue(), doc.getField("f").numericValue());
664 assertEquals(expected.getField("b").binaryValue(), doc.getField("b").binaryValue());
665 }
666
667 reader.close();
668 w.close();
669 TestUtil.checkIndex(dir2);
670 dir2.close();
671 }
672
673 @Nightly
674 public void testBigDocuments() throws IOException {
675
676
677
678
679 Directory dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("testBigDocuments")));
680 IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
681 iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
682 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
683
684 if (dir instanceof MockDirectoryWrapper) {
685 ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER);
686 }
687
688 final Document emptyDoc = new Document();
689 final Document bigDoc1 = new Document();
690 final Document bigDoc2 = new Document();
691
692 final Field idField = new StringField("id", "", Store.NO);
693 emptyDoc.add(idField);
694 bigDoc1.add(idField);
695 bigDoc2.add(idField);
696
697 final FieldType onlyStored = new FieldType(StringField.TYPE_STORED);
698 onlyStored.setIndexOptions(IndexOptions.NONE);
699
700 final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored);
701 final int numFields = RandomInts.randomIntBetween(random(), 500000, 1000000);
702 for (int i = 0; i < numFields; ++i) {
703 bigDoc1.add(smallField);
704 }
705
706 final Field bigField = new Field("fld", randomByteArray(RandomInts.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored);
707 bigDoc2.add(bigField);
708
709 final int numDocs = atLeast(5);
710 final Document[] docs = new Document[numDocs];
711 for (int i = 0; i < numDocs; ++i) {
712 docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2));
713 }
714 for (int i = 0; i < numDocs; ++i) {
715 idField.setStringValue("" + i);
716 iw.addDocument(docs[i]);
717 if (random().nextInt(numDocs) == 0) {
718 iw.commit();
719 }
720 }
721 iw.commit();
722 iw.forceMerge(1);
723 final DirectoryReader rd = DirectoryReader.open(dir);
724 final IndexSearcher searcher = new IndexSearcher(rd);
725 for (int i = 0; i < numDocs; ++i) {
726 final Query query = new TermQuery(new Term("id", "" + i));
727 final TopDocs topDocs = searcher.search(query, 1);
728 assertEquals("" + i, 1, topDocs.totalHits);
729 final Document doc = rd.document(topDocs.scoreDocs[0].doc);
730 assertNotNull(doc);
731 final IndexableField[] fieldValues = doc.getFields("fld");
732 assertEquals(docs[i].getFields("fld").length, fieldValues.length);
733 if (fieldValues.length > 0) {
734 assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue());
735 }
736 }
737 rd.close();
738 iw.close();
739 dir.close();
740 }
741
742 public void testBulkMergeWithDeletes() throws IOException {
743 final int numDocs = atLeast(200);
744 Directory dir = newDirectory();
745 RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
746 for (int i = 0; i < numDocs; ++i) {
747 Document doc = new Document();
748 doc.add(new StringField("id", Integer.toString(i), Store.YES));
749 doc.add(new StoredField("f", TestUtil.randomSimpleString(random())));
750 w.addDocument(doc);
751 }
752 final int deleteCount = TestUtil.nextInt(random(), 5, numDocs);
753 for (int i = 0; i < deleteCount; ++i) {
754 final int id = random().nextInt(numDocs);
755 w.deleteDocuments(new Term("id", Integer.toString(id)));
756 }
757 w.commit();
758 w.close();
759 w = new RandomIndexWriter(random(), dir);
760 w.forceMerge(TestUtil.nextInt(random(), 1, 3));
761 w.commit();
762 w.close();
763 TestUtil.checkIndex(dir);
764 dir.close();
765 }
766
767
768 public void testMismatchedFields() throws Exception {
769 Directory dirs[] = new Directory[10];
770 for (int i = 0; i < dirs.length; i++) {
771 Directory dir = newDirectory();
772 IndexWriterConfig iwc = new IndexWriterConfig(null);
773 IndexWriter iw = new IndexWriter(dir, iwc);
774 Document doc = new Document();
775 for (int j = 0; j < 10; j++) {
776
777 doc.add(new StringField(Integer.toString(j), Integer.toString(j), Field.Store.YES));
778 }
779 for (int j = 0; j < 10; j++) {
780 iw.addDocument(doc);
781 }
782
783 DirectoryReader reader = DirectoryReader.open(iw, true);
784
785 if (random().nextBoolean()) {
786 reader = new MismatchedDirectoryReader(reader, random());
787 }
788 dirs[i] = newDirectory();
789 IndexWriter adder = new IndexWriter(dirs[i], new IndexWriterConfig(null));
790 TestUtil.addIndexesSlowly(adder, reader);
791 adder.commit();
792 adder.close();
793
794 IOUtils.close(reader, iw, dir);
795 }
796
797 Directory everything = newDirectory();
798 IndexWriter iw = new IndexWriter(everything, new IndexWriterConfig(null));
799 iw.addIndexes(dirs);
800 iw.forceMerge(1);
801
802 LeafReader ir = getOnlySegmentReader(DirectoryReader.open(iw, true));
803 for (int i = 0; i < ir.maxDoc(); i++) {
804 Document doc = ir.document(i);
805 assertEquals(10, doc.getFields().size());
806 for (int j = 0; j < 10; j++) {
807 assertEquals(Integer.toString(j), doc.get(Integer.toString(j)));
808 }
809 }
810
811 IOUtils.close(iw, ir, everything);
812 IOUtils.close(dirs);
813 }
814 }